In [69]:
import pandas as pd

In [3]:
import ghgmosh as gm

In [174]:
reload(gm)


Out[174]:
<module 'ghgmosh' from 'ghgmosh.py'>

get data from compustat


In [16]:
usa = pd.io.excel.read_excel("../CDPdata/s12_american.xlsx")
world = pd.io.excel.read_excel("../CDPdata/s12_world.xlsx")

In [5]:
world.columns


Out[5]:
Index([u'Global Company Key', u'Industry Format', u'Data Format', u'Level of Consolidation - Company Annual Descriptor', u'Population Source', u'Data Year - Fiscal', u'Fiscal Year-end Month', u'Data Date', u'Assets - Total', u'Cost of Goods Sold', u'Property, Plant and Equipment - Total (Net)', u'Revenue - Total', u'Stockholders Equity - Parent', u'International Security Identification Number', u'Net Income (Loss) - Consolidated', u'Company Name', u'Active/Inactive Status Marker', u'Current ISO Country Code - Incorporation', u'Current ISO Country Code - Headquarters'], dtype='object')

In [6]:
usa.columns


Out[6]:
Index([u'Global Company Key', u'Data Date', u'Data Year - Fiscal', u'Industry Format', u'Level of Consolidation - Company Annual Descriptor', u'Population Source', u'Data Format', u'Ticker Symbol', u'ISO Currency Code', u'Fiscal Year-end Month', u'Assets - Total', u'Cost of Goods Sold', u'Net Income (Loss)', u'Property, Plant and Equipment - Total (Net)', u'Revenue - Total', u'Stockholders Equity - Parent', u'Active/Inactive Status Marker'], dtype='object')

In [17]:
world.rename(columns={"International Security Identification Number":"ISIN", 
                      'Net Income (Loss) - Consolidated': "income"}, inplace=True)
world = world[world["ISIN"].isnull()==False]
world = world[world["Revenue - Total"].isnull()==False]
len(world["ISIN"].value_counts().index) # 900
# chinese companies didn't return data


Out[17]:
900

In [18]:
usa.rename(columns={"Ticker Symbol":"Ticker",
                    'Net Income (Loss)':"income"}, inplace=True)
len(usa['Ticker'].value_counts().index) # 347


Out[18]:
347

In [19]:
orginfos = pd.read_pickle("../CDPdata/orginfos.pkl")

In [20]:
orginfos["Ticker"] = orginfos["Ticker"].apply(lambda(x): str(x).split(" ")[0])
orginfos_byticker = orginfos.reset_index().set_index("Ticker")
orginfos_byticker = orginfos_byticker[orginfos_byticker["Country"]== "USA"]
orginfos_byisin = orginfos.reset_index().set_index("ISIN")

In [21]:
usa.set_index("Ticker", inplace=True)
world.set_index("ISIN", inplace=True)

In [22]:
usainfos = usa.join(orginfos_byticker[["Organisation"]])
worldinfos = world.join(orginfos_byisin[["Organisation"]])

In [23]:
len(usainfos["Organisation"].value_counts().index) # 347
len(worldinfos["Organisation"].value_counts().index) # 900 perfect


Out[23]:
900

In [24]:
usainfos = usainfos.reset_index().set_index("Organisation")
worldinfos = worldinfos.reset_index().set_index("Organisation")

In [31]:
s12_financials = pd.concat([usainfos, worldinfos])
s12_financials.rename(columns={"Data Year - Fiscal":"year", "Revenue - Total":"Revenues", "Cost of Goods Sold":"COGS",
                               'Stockholders Equity - Parent': "Equity", 'Assets - Total': "Assets", "income": "Income",
                               'Property, Plant and Equipment - Total (Net)':"PPE"},inplace=True)
s12_financials = s12_financials.reset_index().set_index(["Organisation","year"])
s12_financials.to_pickle("../CDPdata/s12_financials.pkl")

In [27]:
s12_financials["ISO Currency Code"].value_counts() # all USD thank goodness


Out[27]:
USD    2055
dtype: int64

In [70]:
# combine with s12_completed to compute intensities
s12_financials = pd.read_pickle("../CDPdata/s12_financials.pkl")
scopes12_c = pd.read_pickle("../CDPdata/s12_completed.pkl")

In [71]:
s12_ghgfins = scopes12_c[["Country", "GICS Industry",
                          "GICS Sector", "scope1", "scope2"]].join(s12_financials[fm.FINCOLS], how="inner")

In [72]:
s12_ghgfins["1and2 total"] = s12_ghgfins["scope1"]+ s12_ghgfins["scope2"]
s12_ghgfins["GICS Sector"].replace("Banks", "Financials", inplace=True)

In [36]:
import finmosh as fm

In [73]:
reload(fm)


Out[73]:
<module 'finmosh' from 'finmosh.pyc'>

In [74]:
s = s12_ghgfins.reset_index()
s12_ghgfins = fm.adjust_inflation_all(s)

In [75]:
for cols in fm.FINCOLS:
    s12_ghgfins[cols].fillna(0, inplace=True)

In [76]:
s12_ghgfins["1and2 intensity"] = s12_ghgfins["1and2 total"]/s12_ghgfins["Revenues"]
s12_ghgfins = s12_ghgfins[s12_ghgfins["1and2 intensity"].isnull() == False]
s12_ghgfins.set_index(["Organisation", "year"], inplace=True)
s12_ghgfins.sort_index(inplace=True)

In [78]:
len(s12_ghgfins.index.levels[0].value_counts().index) #1243 instead of 1247 requested from COMPUSTAT
len(s12_ghgfins) # 4673


Out[78]:
1243

In [79]:
# revs and cogs are in millions USD jan 2015
# intensity is in tons/million
s12_ghgfins.to_pickle("../CDPdata/s12_ghgfins.pkl")

In [80]:
s12_ghgfins.head()


Out[80]:
Country GICS Industry GICS Sector scope1 scope2 Revenues COGS Equity PPE Assets Income 1and2 total 1and2 intensity
Organisation year
3M Company 2009 USA Industrial Conglomerates Industrials 3290000 1690000 24939.230138 11709.779078 13766.567205 7549.825324 29390.391440 3443.798894 4980000 199.685394
2010 USA Industrial Conglomerates Industrials 4300000 1950000 28294.483474 13489.279853 16622.027404 7724.684765 32002.417059 4335.119833 6250000 220.891115
2011 USA Industrial Conglomerates Industrials 4060000 2030000 30530.950861 14906.148276 15899.066640 7904.166334 32598.241952 4416.063711 6090000 199.469713
2012 USA Industrial Conglomerates Industrials 4540000 2230000 30349.027827 14611.254468 17836.549092 8502.680415 34380.138666 4510.135088 6770000 223.071396
2013 USA Industrial Conglomerates Industrials 4550000 2230000 30843.417282 14721.834526 17486.362258 8644.269584 33520.023641 4654.837262 6780000 219.820000

In [83]:
s = s12_ghgfins.reset_index()
s[s.duplicated(["Organisation", "year"])]


Out[83]:
Organisation year Country GICS Industry GICS Sector scope1 scope2 Revenues COGS Equity PPE Assets Income 1and2 total 1and2 intensity
407 Associated British Foods 2012 United Kingdom Food Products Consumer Staples 2295328.00 1067934.00 12434.332830 8906.603405 5920.820905 4608.578630 10392.390481 563.259445 3363262.00 270.481902
1657 Gold Fields Limited 2009 South Africa Metals & Mining Materials 1308764.00 5093511.00 16698.811506 10486.815230 0.000000 0.000000 0.000000 2605.552574 6402275.00 383.397046
1659 Gold Fields Limited 2010 South Africa Metals & Mining Materials 1377194.00 5164897.00 19429.083823 11535.451426 46208.132505 56510.223767 76211.406666 -80.971761 6542091.00 336.716392
2614 Massmart Holdings Ltd 2012 South Africa Food & Staples Retailing Consumer Staples 25673.73 298522.25 36660.172304 29962.560806 4810.235661 3925.766100 23362.377969 702.095287 324195.98 8.843275
4210 Tongaat Hulett Ltd 2009 South Africa Food Products Consumer Staples 787711.00 309388.00 12010.693544 9820.165654 4932.193030 8315.593321 14418.009276 3125.627684 1097099.00 91.343518

graph histograms of raw data

it should vary the most by sector


In [11]:
import numpy as np
from collections import OrderedDict
from bokeh.charts import Scatter
from bokeh.charts import Histogram
from bokeh.plotting import output_notebook, show
output_notebook()


BokehJS successfully loaded.

In [12]:
import datavis as dv

In [70]:
ghgfins_s = s12_ghgfins.reset_index().set_index(["GICS Sector","year"])
ghgfins_yr = s12_ghgfins.reset_index().set_index("year")

In [71]:
ghgfins_s.index.levels[0]


Out[71]:
Index([u'Consumer Discretionary', u'Consumer Staples', u'Energy', u'Financials', u'Health Care', u'Industrials', u'Information Technology', u'Materials', u'Telecommunication Services', u'Utilities'], dtype='object')

In [140]:
sector = "Consumer Staples"
int_s = OrderedDict()
for yr in range(2009,2014):
    vs = np.array(ghgfins_s.loc[sector,yr]["1and2 intensity"].tolist())
    int_s[str(yr)] = vs[vs<500]

In [124]:
intensity = OrderedDict()
for yr in range(2009,2014):
    vs = np.array(ghgfins_yr.loc[yr]["1and2 intensity"].tolist())
    intensity[str(yr)] = vs[vs<300]

In [141]:
# histograms of intensities over time for each sector
title = "Scope 1 and 2 Intensity Consumer Staples"
fname = "12intyear_energy.html"
hist = Histogram(int_s, bins=20, filename=fname, title = title, legend="top_right",
                 ylabel="Proportion of Companies", xlabel="Intensity in tCO2e/$million")

In [142]:
show(hist)



In [ ]: